{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "8f2b4d1e",
   "metadata": {},
   "source": [
    "<a href=\"https://colab.research.google.com/github/run-llama/llama_index/blob/main/docs/examples/data_connectors/FaissDemo.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5d974136",
   "metadata": {},
   "source": [
    "# Faiss Reader"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "1f59b68b",
   "metadata": {},
   "source": [
    "If you're opening this Notebook on colab, you will probably need to install LlamaIndex 🦙."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c50f69e6",
   "metadata": {},
   "outputs": [],
   "source": [
    "%pip install llama-index-readers-faiss"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4c4defdb",
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install llama-index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4026b434",
   "metadata": {},
   "outputs": [],
   "source": [
    "import logging\n",
    "import sys\n",
    "\n",
    "logging.basicConfig(stream=sys.stdout, level=logging.INFO)\n",
    "logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b541d8ec",
   "metadata": {},
   "outputs": [],
   "source": [
    "from llama_index.readers.faiss import FaissReader"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "90d37078",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Build the Faiss index.\n",
    "# A guide for how to get started with Faiss is here: https://github.com/facebookresearch/faiss/wiki/Getting-started\n",
    "# We provide some example code below.\n",
    "\n",
    "import faiss\n",
    "\n",
    "# # Example Code\n",
    "# d = 8\n",
    "# docs = np.array([\n",
    "#     [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],\n",
    "#     [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2],\n",
    "#     [0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3],\n",
    "#     [0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.4],\n",
    "#     [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]\n",
    "# ])\n",
    "# # id_to_text_map is used for query retrieval\n",
    "# id_to_text_map = {\n",
    "#     0: \"aaaaaaaaa bbbbbbb cccccc\",\n",
    "#     1: \"foooooo barrrrrr\",\n",
    "#     2: \"tmp tmptmp tmp\",\n",
    "#     3: \"hello world hello world\",\n",
    "#     4: \"cat dog cat dog\"\n",
    "# }\n",
    "# # build the index\n",
    "# index = faiss.IndexFlatL2(d)\n",
    "# index.add(docs)\n",
    "\n",
    "id_to_text_map = {\n",
    "    \"id1\": \"text blob 1\",\n",
    "    \"id2\": \"text blob 2\",\n",
    "}\n",
    "index = ..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fd470a09",
   "metadata": {},
   "outputs": [],
   "source": [
    "reader = FaissReader(index)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c33084c5",
   "metadata": {},
   "outputs": [],
   "source": [
    "# To load data from the Faiss index, you must specify:\n",
    "# k: top nearest neighbors\n",
    "# query: a 2D embedding representation of your queries (rows are queries)\n",
    "k = 4\n",
    "query1 = np.array([...])\n",
    "query2 = np.array([...])\n",
    "query = np.array([query1, query2])\n",
    "\n",
    "documents = reader.load_data(query=query, id_to_text_map=id_to_text_map, k=k)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0b74697a",
   "metadata": {},
   "source": [
    "### Create index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e85d7e5b",
   "metadata": {},
   "outputs": [],
   "source": [
    "index = SummaryIndex.from_documents(documents)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "31c3b68f",
   "metadata": {},
   "outputs": [],
   "source": [
    "# set Logging to DEBUG for more detailed outputs\n",
    "query_engine = index.as_query_engine()\n",
    "response = query_engine.query(\"<query_text>\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "56fce3fb",
   "metadata": {},
   "outputs": [],
   "source": [
    "display(Markdown(f\"<b>{response}</b>\"))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
